* This file takes 2011-2013 ED data and sets up the data for analysis

set more off

** Run the loop for the 2 years of data that we use (2011-12 and 2012-13)
forval i=1(1)3{

	if `i'==1{
	local datayear 1112
	local year 2011
	}
	
	if `i'==2{
	local datayear 1213
	local year 2012
	}
	
	if `i'==3{
	local datayear 1314
	local year 2013
	}


use "$sourcedata/AE_`datayear'", clear

set more off
* patient identifer
cap gen extract_hesid = encrypted_hesid

* admission and discharge variables
gen admit = aeattenddisp==1
gen discharge = (aeattenddisp==2 | aeattenddisp==3)

set more off
* date variables
gen finyear=`year'

gen arrivalyear = substr(arrivaldate,1,4)
gen arrivalmonth = substr(arrivaldate,6,2)
gen arrivalday = substr(arrivaldate,9,2)
destring arrivalyear arrivalmonth arrivalday, replace
gen arrivaldate2 = mdy(arrivalmonth, arrivalday, arrivalyear)
drop arrivaldate

gen exitdate2 = arrivaldate2
replace exitdate2 = exitdate2+1 if (deptime<arrivaltime)
replace exitdate2 = exitdate2+2 if (deptime<arrivaltime & depdur>1440)

rename *date2 *date

foreach i in arrival dep {
	tostring `i'time, gen(hour_string)
	gen hour = substr(hour_string,1,2) if strlen(hour_string)==4
	replace hour = substr(hour_string,1,1) if strlen(hour_string)==3
	replace hour = "00" if strlen(hour_string)<3
	destring hour, replace
	rename hour `i'hour
	drop hour_string 
	}
rename dephour exithour
rename deptime exittime

* hospital codes
rename procode site_code

* gender
gen d_male = sex==1
replace d_male = . if sex!=1 & sex!=2

* age
gen d_age = arrivalage
replace d_age = 0 if arrivalage>7000
replace d_age = . if arrivalage==.

* white - removed (missing data)
gen d_white = ethnos=="A"
replace d_white = . if ethnos=="Z" | ethnos==""

* urban
gen d_urban = rururb_ind==5
replace d_urban = . if rururb_ind==. | rururb_ind==9

* ambulance
gen d_ambulance = aearrivalmode==1
replace d_ambulance = . if aearrivalmode==9 | aearrivalmode==.

set more off
rename diag2_0* diag2_*
forv x = 1/12 {
	destring diag2_`x', replace force
	gen d`x' = diag2_`x'
	replace d`x' = . if d`x'>39 | d`x'==0
	}	

forv x = 2/12 {
	replace d1 = d`x' if d1==. & d`x'!=.		// this moves later diagnoses into the primay diagnosis if the latter is empty
	replace d`x' = . if d1==d`x' & d`x'!=.
	}

egen dcount = rownonmiss(d1 d2 d3 d4 d5 d6 d7 d8 d9 d10 d11 d12)
replace dcount = . if dcount==0				// no codes recorded
forv x = 1/12 {
	replace dcount = dcount - 1 if d`x'==39		// subtract 1 if diagnosis is "nothing"
	}

* treatment variables - cleans, counts treatments and stores primary treatment code

rename treat2_0* treat2_*
forv x = 1/12 {
	destring treat2_`x', replace force
	gen t`x' = treat2_`x'
	replace t`x' = . if (t`x'>57 & t`x'!=99) | t`x'==0
	}
	
forv x = 2/12 {
	replace t1 = t`x' if t1==. & t`x'!=.		// this moves later treatments into the primay treatment if the latter is empty
	replace t`x' = . if t1==t`x' & t`x'!=.
	}
	
egen tcount = rownonmiss(t1 t2 t3 t4 t5 t6 t7 t8 t9 t10 t11 t12)
replace tcount = . if tcount==0				// no codes recorded
forv x = 1/12 {
	replace tcount = tcount - 1 if t`x'==99		// subtract 1 if treatment is "none"
	}

* investigation variables - cleans, counts number of ED investigations and stores primary investigation codes

forv x = 1/12 {
	destring invest2_`x', replace force
	gen i`x' = invest2_`x'
	replace i`x' = . if (i`x'>24 & i`x'!=99) | i`x'==0
	}
	
forv x = 2/12 {
	replace i1 = i`x' if i1==. & i`x'!=.		// this moves later investigations into the primay investigation if the latter is empty
	replace i`x' = . if i1==i`x' & i`x'!=.
	}

egen icount = rownonmiss(i1 i2 i3 i4 i5 i6 i7 i8 i9 i10 i11 i12)
replace icount = . if icount==0				// no codes recorded
forv x = 1/12 {
	replace icount = icount - 1 if i`x'==24		// subtract 1 if investigation is "none"
	}

	
egen tag = tag(site_code)
count if tag

* exclusions: drop non-acute trusts
drop if substr(site_code,1,1)!="R"
count if tag

* exclusions: drop small sites (average monthly arrivals of less than 500)
bys site_code finyear arrivalmonth: egen countpat = count(arrivalhour)
bys site_code: egen meancountpat = mean(countpat)
drop if meancountpat<500
count if tag

* exclusions: keep hospital-years with full data
gen month = mofd(arrivaldate)
egen tag2 = tag(month site_code)
bys site_code: egen sumtag2 = sum(tag2)
set more off
drop if sumtag2<12
count if tag

* exclusions: miscellaneous data errors
drop if exittime==4000	// invalid exittimes
						
duplicates drop extract_hesid site_code arrivaldate exithour, force	// duplicate visits
count if tag

* save
keep extract_hesid site_code finyear arrivaldate arrivalhour arrivaltime exitdate exithour exittime *time aeattenddisp aearrivalmode admit discharge *dur *1 *2 *3 *4 *5 *6 *7 *8 *9 *10 *11 *12 *count d_* lsoa01 aedepttype sushrg
		
lab var extract_hesid "Unique patient identifier"
label var aeattenddisp "Admission or discharge outcome"
label define aeattenddisp 1 "Inpatient admission" 2 "Discharged - followup at GP" 3 " Discharged - no further treatment" 4 "Ref. to A&E clinic" 5 "Ref. to frac. clinic" 6 "Ref. to OP clinic" 7 "Transfer to other provider" 10 "Died in department" ///
11 "Ref. to other healthcare professional" 12 "Left dept. being treatment" 13 "Refused treatment" 14 "Other" 99 "Unknown"
label values aeattenddisp aeattenddisp
label var admit "=1 if patient admitted as an inpatient"
label var discharge "=1 if patient discharged from A&E with no further hosp treatment"
label var initdur "Minutes between arrival and initial assessment"
label var tretdur "Minutes between arrival and initial treatment"
label var concldur "Minutes between arrival and conclusion of attendance"
label var depdur "Minutes between arrival and A&e dept. no longer being responsible for pat"
label var arrivaldate "Date of arrival, date format"
label var arrivaltime "Time of arrival, HHMM"
label var inittime "Time of initial assessment/triage, HHMM"
label var trettime "Time that treatment commences, HHMM"
label var concltime "Time that A&e treatment concludes, HHMM"
label var exittime "Time that A&E no longer responsible for pat, HHMM"
label var site_code "Site code"
lab var d1 "A&E diagnosis `x'"
lab var i1 "A&E investigation `x'"
lab var t1 "A&E treatment `x'"
lab var dcount "Count of A&E diagnosis codes"
lab var tcount "Count of A&E treatment codes"
lab var icount "Count of A&E investigation codes"
label var sushrg "HRG code for ER appearance"
compress
drop invest* diag* treat*

save "$saveddata/data_ae`year'", replace
}


* Create index files for A&E attendees to be used in later files - stores IDs of everyonbe who attended a major ED in 11-12 and 12-13

use "$saveddata/data_ae2011", clear
append using "$saveddata/data_ae2012"

bys extract: gen x=_n if aedepttype==1
keep if x==1

keep extract x

save "$saveddata/ae_index_ids.dta", replace
